1 Introduction

In this document, I will give a detailed illustration of the proposed parallel partial cokriging model with a toy example and storm surge simulations from the ADCIRC model and the SWAN+ADCIRC model. This document can be served as suplementary materials to reproduce the results in the main manuscript.

1.1 Install the ARCokrig Package

The parallel partial cokriging method is implemented in the ARCokrig R package. We first install the ARCokrig package.

## install relevant packages if not installed 
#install.packages(c("Rcpp", "RcppEigen", "RcppArmadillo", "RobustGaSP", "mvtnorm", "fields"))
## install ARCokrig package from local source
#install.packages("ARCokrig_0.1.0.tar.gz", repos=NULL, type="source")
## one can also install the package from github
library(devtools)
#install_github("pulongma/ARCokrig")
library(ARCokrig)

2 A Toy Example

After installing the package, we start with a toy example with two levels, where the high-fidelity code is a scale multiple of low-fidelity code plus a non-linear location discrepancy term. The design for level 1 code is set up as 20 points uniformly located in the interval [-1, 1] with spacing 0.1, which will be denoted as \(\mathcal{X}_1\). The design for the high-fidelity code is set up as \(\mathcal{X}_2=\{-1, -0.8, -0.55, -0.4, -0.2, 0, 0.2, 0.4, 0.6, 1\}\). Hence, \(\mathcal{X}_2\) is not nested within \(\mathcal{X}_1\).

library(ggplot2)
library(ARCokrig)

#########################################################################
#########################################################################
## test function
fun.lf = function(x){
  return(0.5*(6*x-2)^2*sin(12*x-4)+10*(x-0.5)-5)
}

fun.hf = function(x){
  z1 = fun.lf(x)
  z2 = 2*z1-20*x+20 + sin(10*cos(5*x))
  return(z2)
}

## setup the design 
Dc = seq(-1,1,0.1)
Df = c(-1, -0.8, -0.55, -0.4, -0.2, 0, 0.2, 0.4, 0.6, 1)

zc = fun.lf(Dc)
zf = fun.hf(Df)

## predictive inputs
input.new = as.matrix(seq(-1,1,le=200))

#########################################################################
#########################################################################
## perform the autoregressive cokriging  
output = list(as.matrix(zc), as.matrix(zf))
input = list(as.matrix(Dc), as.matrix(Df))

tuning = list(maxit=30, n.sample=30, verbose=TRUE)
obj = cokm(formula=list(~1,~1+x1), output=output, input=input,
           param=list(0.1,0.1), cov.model="matern_5_2", NestDesign=FALSE,
           tuning=tuning)
## 
##  Constructing cokm object for non-nested design.
obj = cokm.fit(obj)
## iter= 1 
## iter= 2 
## iter= 3
## nsample is set to be large, so that a nice plot is generated. 
## The improvement of prediction performance is negligible 
## by tuning the Monte Carlo sample size
pred = cokm.condsim(obj, input.new=input.new, nsample=1000)

pred.non = pred
yhat.l2 = pred.non$mu[[2]]
CI.lower = pred.non$lower95[[2]]
CI.upper = pred.non$upper95[[2]]

df.l1 = data.frame(x=c(Dc), y=c(zc))
df.l2 = data.frame(x=c(Df), y=c(zf))

#########################################################################
#########################################################################
### plottting 

g1 = ggplot(data.frame(x=c(-1,1)), aes(x)) + 
  stat_function(fun=fun.lf, geom="line", aes(colour="level 1"), n=500) +
  stat_function(fun=fun.hf, geom="line", aes(colour="level 2"), n=500)

g1 = g1 + geom_point(data=df.l1, mapping=aes(x=x, y=y), shape=16, size=2, color="black") + 
  geom_point(data=df.l2, mapping=aes(x=x, y=y), shape=17, size=2, color="black")

df.CI = data.frame(x=c(input.new),lower=CI.lower, upper=CI.upper)
df.pred = data.frame(x=c(input.new), y=yhat.l2)
g1 = g1 + geom_line(data=df.pred, aes(x=x, y=y, color="cokriging"))
g1 = g1 + geom_ribbon(data=df.CI, mapping=aes(x=x,ymin=lower, ymax=upper), fill="gray40",alpha=0.3)
g1 = g1 +   scale_colour_manual(name=NULL, values=c("red", "blue", "turquoise3"), breaks=c("cokriging", "level 1", "level 2"))
g1 = g1 +  
  theme(plot.title=element_text(size=14),
        axis.title.x=element_text(size=14),
        axis.text.x=element_text(size=14),
        axis.title.y=element_text(size=14),
        axis.text.y=element_text(size=14),
        legend.text = element_text(size=12),
        legend.direction = "horizontal",
        legend.position = c(0.6, 0.1)) + xlab("") + ylab("")
print(g1)

#########################################################################
#########################################################################
## Kriging with High-Fidelity Data 

library(RobustGaSP)
## #########
## ##
## ## Robust Gaussian Stochastic Process, RobustGaSP Package
## ## Copyright (C) 2016-2019 Mengyang Gu, Jesus Palomo and James O. Berger
## #########
## 
## Attaching package: 'RobustGaSP'
## The following object is masked from 'package:stats':
## 
##     simulate
H = cbind(rep(1, length.out=length(Df)), Df)
kfit = RobustGaSP::rgasp(design=Df, response=zf, trend=H)
## The upper bounds of the range parameters are 191.993 
## The initial values of range parameters are 3.839861 
## Start of the optimization  1  : 
## The number of interation is  11 
##  The value of the posterior is  -33.85983 
##  Optimized range parameters are 0.05462824 
##  Optimized nugget parameter is 0 
##  Convergence:  TRUE 
## The initial values of range parameters are 0.04 
## Start of the optimization  2  : 
## The number of interation is  7 
##  The value of the posterior is  -33.85983 
##  Optimized range parameters are 0.05462824 
##  Optimized nugget parameter is 0 
##  Convergence:  TRUE
Hp = cbind(rep(1, length.out=dim(input.new)[1]), input.new)
kpred = RobustGaSP::predict(object=kfit, testing_input=input.new,
                            testing_trend=Hp)

df.l0 = data.frame(x=input.new, y=kpred$mean)
df.l1 = data.frame(x=c(Dc), y=c(zc))
df.l2 = data.frame(x=c(Df), y=c(zf))


df.CI = data.frame(x=c(input.new),lower=kpred$lower95, upper=kpred$upper95)

## figures for kriging
g2 = ggplot(data.frame(x=c(-1,1)), aes(x)) + 
  stat_function(fun=fun.lf, geom="line", aes(colour="level 1"), n=500) +
  stat_function(fun=fun.lf, geom="line", aes(colour="level 2"), n=500) 
g2 = g2 + 
  geom_point(data=df.l2, mapping=aes(x=x, y=y), shape=17, size=2, color="black")

g2 = g2 + geom_line(data=df.l0, aes(x=x, y=y, colour="kriging"), 
                    inherit.aes=FALSE)
g2 = g2 + geom_ribbon(data=df.CI, mapping=aes(x=x,ymin=lower, ymax=upper), 
                      fill="gray40", alpha=0.3, inherit.aes=FALSE)
g2 = g2 +   scale_colour_manual(name=NULL, values=c("red", "blue", "turquoise3"), 
                                breaks=c( "kriging", "level 1", "level 2"))

g2 = g2 + 
  theme(plot.title=element_text(size=14),
        axis.title.x=element_text(size=14),
        axis.text.x=element_text(size=14),
        axis.title.y=element_text(size=14),
        axis.text.y=element_text(size=14),
        legend.text = element_text(size=12),
        legend.direction = "horizontal",
        legend.position = c(0.6, 0.1)) + xlab("") + ylab("")
print(g2)

3 Storm Surge Application

The storm surges are simulated from the ADCIRC model, and the SWAN + ADCIRC model, which will be referred to as the low-fidelity simulator and the high-fidelity simulator. The goal here is to demonstrate the propoased parallel partial cokriging model to emulate the high-fidelity simulator based on a very limited number of high-fidelity model runs and a small number of low-fidelity model runs.

library(ARCokrig)
## load storm surge simulations
load("StormSurge.RData")

## set up the design
n_runs = dim(inputs)[1]
k_outputs = dim(lonlat)[1]
set.seed(123)
id.input = 1:n_runs
id.output = 1:k_outputs 
id.training = sample(id.input, 60)
z.hf = PSE.hf[id.training, id.output]
set.seed(234)
id.remain = id.input[-id.training]
id.lf = c(sample(id.remain, 140), id.training)
z.lf = PSE.lf[id.lf, id.output]

# convert long/lat of lanfall to distance
library(fields)
## Loading required package: spam
## Loading required package: dotCall64
## Loading required package: grid
## Spam version 2.2-1 (2018-12-20) is loaded.
## Type 'help( Spam)' or 'demo( spam)' for a short introduction 
## and overview of this package.
## Help for individual functions is also obtained by adding the
## suffix '.spam' to the function name, e.g. 'help( chol.spam)'.
## 
## Attaching package: 'spam'
## The following objects are masked from 'package:base':
## 
##     backsolve, forwardsolve
## Loading required package: maps
## See www.image.ucar.edu/~nychka/Fields for
##  a vignette and other supplements.
lon0 = min(inputs[ ,6])
lat0 = max(inputs[, 7])
reference = matrix(c(lon0, lat0), nrow=1)
dist = c(rdist.earth(reference, inputs[ ,c(6,7)]))
inputs.6Dim = cbind(inputs[ ,1:5], dist)

input.hf = inputs.6Dim[id.training, ]
input.lf = inputs.6Dim[id.lf, ]

input.max = apply(inputs.6Dim, 2, max)
input.min = apply(inputs.6Dim, 2, min)
delta = input.max - input.min

set.seed(1234)
id.h.l = c(1:140, 140 + sample(1:60, 50))
zc = z.lf[id.h.l, ]  
zf = z.hf
Dc = input.lf[id.h.l, ]
Df = input.hf
output = list(zc, zf)
input = list(Dc, Df)

inputdata.new = inputdata[-id.training, ]
input.new = inputs.6Dim[-id.training, ]
output.new = PSE.hf[-id.training, id.output]
output.new.lf = PSE.lf[-id.training, id.output]
n.new = dim(output.new.lf)[1]
input.new.orig = input.new

## construct the mvcokm object
cov.model = "matern_5_2"
formula = list(~1, ~1)

param = list(c(delta/4), c(delta/4))

opt = list(maxit=800)
prior = list(name="JR")
NestDesign = FALSE

obj = mvcokm(formula=formula, output=output, input=input, param=param,
             cov.model="matern_5_2",prior=prior,opt=opt,
             NestDesign=NestDesign)

## fit the PP cokriging model
# this step takes about 4.5 hours on a macbook pro
obj@tuning$maxit=20
obj@tuning$verbose = TRUE

#t1 = proc.time()
#obj.fitted = mvcokm.fit(obj)
#t.est = proc.time() - t1

# load the fitted data
load("PSE_fitted_data.RData")

## predict with the PP cokriging model

y.testing = output.new
z.testing = output.new.lf
n.new = dim(y.testing)[1]

# takes about 10 minutes for all 166 inputs
t1 = proc.time()
sim = mvcokm.condsim(obj=obj.fitted, input.new=input.new)
t.sim = proc.time() - t1
pred = sim

#########################################################################
#########################################################################
## compute predictive measures  (Table 2)

# a function to compute all the predictive measures
nump <- function(y.testing, y.pred, Lower95, Upper95, y.predSE){
  RMSPE=sqrt(mean((y.testing-y.pred)^2))
  PCI95 = mean( (Lower95<y.testing) & (Upper95>y.testing) )
  LCI95 = mean(Upper95 - Lower95)
  crps = mean(CRPS(y.testing, y.pred, y.predSE))
  y.testing.mean = apply(y.testing, 2, mean)
    y.training.mean = apply(zf, 2, mean)
  res = y.pred - matrix(rep(y.training.mean, 
        times=dim(y.testing)[1]), byrow=T, nrow=dim(y.testing)[1])
  NSME = 1 - sum((y.testing - y.pred)^2) / sum((res)^2)
  
  measure = c(RMSPE, PCI95, LCI95, crps, NSME)
  names(measure) <- c("RMSPE", "P95CI", "L95CI", "CRPS", "NSME")
  return(measure)
}

y.testing.overall = y.testing 
y.pred.overall = pred$mu[[2]]
Lower95.overall = pred$lower95[[2]]
Upper95.overall = pred$upper95[[2]]
y.predSE.overall = pred$SE[[2]]
y.SE = y.predSE.overall

measure.overall = nump(y.testing=y.testing.overall, y.pred=y.pred.overall,
                       Lower95=Lower95.overall, Upper95=Upper95.overall,
                       y.predSE=y.predSE.overall)
print(measure.overall)
##      RMSPE      P95CI      L95CI       CRPS       NSME 
## 0.04299089 0.99353273 0.30659752 0.05064446 0.99804014

Next, we visualize the results in the manuscript.

## setup the plot
library(ggplot2)
library(reshape2)
theme_mat = theme(axis.text=element_text(size=14),
                  axis.title=element_text(size=14),
                  axis.title.x=element_text(size=14),
                  axis.text.x=element_text(size=12),
                  axis.title.y=element_text(size=14),
                  axis.text.y=element_text(size=12),
                  legend.text=element_text(size=14),
                  legend.title=element_text(size=14),
                  plot.title=element_text(size=14)
)
source("map.surge.R")

y.pred = y.pred.overall
testing_input = input.new
testing_inputdata = inputdata.new
StormID = testing_inputdata$orig_name

## focus on two storms in the figures
indA=which(StormID=="LHS4_A_001545_000014")
print(indA)
## [1] 54
indB=which(StormID=="LHS4_A_002444_000011")
print(indB)
## [1] 161
#########################################################################
#########################################################################
#### Comparison of model runs (Figure 2)
## model runs from the ADCIRC model
i=indA
df.PSE = data.frame(long=lonlat[ ,1],
                    lat=lonlat[ ,2],
                    value=z.testing[i,])
zlim = c(0.6, 4)
Lflocs = as.matrix(testing_inputdata[i, c(8,9)])
g.l = map.surge(data.PSE=df.PSE, 
                zlim=zlim, breaks=c(0.6, 2.3,  4), legendTitle="m")
## Loading required package: sp
## Loading required package: maptools
## Checking rgeos availability: FALSE
##      Note: when rgeos is not available, polygon geometry     computations in maptools depend on gpclib,
##      which has a restricted licence. It is disabled by default;
##      to enable gpclib, type gpclibPermit()
## Loading required package: scales
print(g.l)

## model run from the SWAN + ADCIRC model
df.PSE = data.frame(long=lonlat[ ,1],
                    lat=lonlat[ ,2],
                    value=y.testing[i, ])
Lflocs = as.matrix(testing_inputdata[i, c(8,9)])

g.h = map.surge(data.PSE=df.PSE, 
                zlim=zlim, breaks=c(0.6, 2.3, 4),
                legendTitle="m")
print(g.h)

## different between high-fidelity run and low-fidelity run
df.PSE = data.frame(long=lonlat[ ,1],
                    lat=lonlat[ ,2],
                    value=y.testing[i, ] - z.testing[i, ])

g.hl = map.surge(data.PSE=df.PSE, 
                 zlim=c(0.04, 0.24), breaks=c(0.04, 0.14, 0.24),
                 legendTitle="m")
print(g.hl)

### model runs at another input setting
i=indB
df.PSE = data.frame(long=lonlat[ ,1],
                    lat=lonlat[ ,2],
                    value=z.testing[i,])
zlim = c(1.4, 6)
Lflocs = as.matrix(testing_inputdata[i, c(8,9)])
g.l = map.surge(data.PSE=df.PSE, 
                zlim=zlim, breaks=c(1.4, 3.7, 6), legendTitle="m")
print(g.l)

## model run from the SWAN + ADCIRC model
df.PSE = data.frame(long=lonlat[ ,1],
                    lat=lonlat[ ,2],
                    value=y.testing[i, ])
Lflocs = as.matrix(testing_inputdata[i, c(8,9)])

g.h = map.surge(data.PSE=df.PSE, 
                zlim=zlim, breaks=c(1.4, 3.7, 6),
                legendTitle="m")
print(g.h)

## different between high-fidelity run and low-fidelity run
df.PSE = data.frame(long=lonlat[ ,1],
                    lat=lonlat[ ,2],
                    value=output.new[i, ] - output.new.lf[i, ])

g.hl = map.surge(data.PSE=df.PSE, 
                 zlim=c(0.1, 0.26), breaks=c(0.1, 0.18, 0.26),
                 legendTitle="m")
print(g.hl)

#########################################################################
#########################################################################
#### prediction versus held-out output (Figure 4)
i=indA
PSE.df = data.frame(model=y.testing[i,], pred=y.pred[i, ])
g.scatter1 = ggplot(PSE.df, aes(x=model, y=pred)) + 
  geom_point(size=0.5, shape=1) + 
  labs(x="Model output", y="Predicted output")
g.scatter1 = g.scatter1 + geom_abline(intercept=0, slope=1) + 
  theme(plot.title=element_text(size=14),
        axis.title.x=element_text(size=14),
        axis.text.x=element_text(size=12),
        axis.title.y=element_text(size=14),
        axis.text.y=element_text(size=12),
        legend.text = element_text(size=12)
  )
print(g.scatter1)

i=indB
PSE.df = data.frame(model=y.testing[i,], pred=y.pred[i, ])
g.scatter2 = ggplot(PSE.df, aes(x=model, y=pred)) + 
  geom_point(size=0.5, shape=1) + 
  labs(x="Model output", y="Predicted output")
g.scatter2 = g.scatter2 + geom_abline(intercept=0, slope=1) + 
  theme(plot.title=element_text(size=14),
        axis.title.x=element_text(size=14),
        axis.text.x=element_text(size=12),
        axis.title.y=element_text(size=14),
        axis.text.y=element_text(size=12),
        legend.text = element_text(size=12)
  )
print(g.scatter2)

#########################################################################
#########################################################################
#### Emulation errors across all spatial locations (Figure 6)
## takes about 8 minutes to plot
library(plyr)
## 
## Attaching package: 'plyr'
## The following object is masked from 'package:maps':
## 
##     ozone
surge.error = y.pred - output.new
#N = 9284
N = dim(surge.error)[2]
ind.new = 1:dim(surge.error)[1]
y.error = c(surge.error[ind.new, ])

df = data.frame(x1=rep(input.new[ind.new,1], N),
                x2=rep(input.new[ind.new,2], N),
                x3=rep(input.new[ind.new,3], N),
                x4=rep(input.new[ind.new,4], N),
                x5=rep(input.new[ind.new,5], N),
                x2=rep(input.new[ind.new,6], N),
                y = y.error)
df.melt = melt(df, id="y")
df.melt$variable = revalue(df.melt$variable, 
                           c("x1"="central pressure deficit", 
                             "x2"="scale pressure radius",
                             "x3"="forward speed",
                             "x4"="storm's heading",
                             "x5"="Holland's B",
                             "x2.1"="landfall"))
g = ggplot(data=df.melt, aes(x=value, y=y, color=variable)) + 
  geom_point(shape=21, color="black", size=0.5, fill="lightgray") + 
  facet_wrap(variable ~ ., ncol=3, scale="free_x") + 
  geom_abline(intercept=0, slope=0) + 
  theme(plot.title=element_text(size=14),
        strip.text.x = element_text(size=12),
        axis.title.x=element_text(size=14),
        axis.text.x=element_text(size=14),
        axis.title.y=element_text(size=14),
        axis.text.y=element_text(size=14)) + xlab("") + ylab("")
print(g)

#########################################################################
#########################################################################
#### plot model parameters
library(ARCokrig)
param.list = mvcokm.param(obj.fitted)
b = param.list$coeff
sigma2 = param.list$var
gamma = b[[2]][2, ]

## figure for beta 
df.PSE = data.frame(long=lonlat[ ,1],
                    lat=lonlat[ ,2],
                    value=c(b[[1]]))

g.b1 = map.surge(data.PSE=df.PSE, 
                 zlim=c(0.5, 2), breaks=c(0.5, 1.25, 2),
                 legendTitle="")
print(g.b1)

df.PSE = data.frame(long=lonlat[ ,1],
                    lat=lonlat[ ,2],
                    value=b[[2]][1,])
summary(df.PSE$value)
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## -0.074936 -0.003704  0.010790  0.019902  0.041660  0.270862
g.b2 = map.surge(data.PSE=df.PSE, 
                 zlim=c(0, 0.1), breaks=c(0, 0.05, 0.1),
                 legendTitle="")
print(g.b2)

## figure for gamma
df.PSE = data.frame(long=lonlat[ ,1],
                    lat=lonlat[ ,2],
                    value=b[[2]][2,])

g.gam = map.surge(data.PSE=df.PSE, 
                  zlim=c(1, 1.1), breaks=c(1, 1.05, 1.1),
                  legendTitle="")
print(g.gam)

## figures for sigma2
df.PSE = data.frame(long=lonlat[ ,1],
                    lat=lonlat[ ,2],
                    value=sigma2[[1]]^0.5)

g.sig1 = map.surge(data.PSE=df.PSE, 
                   zlim=c(0.5, 1.2), breaks=c(0.5, 0.85, 1.2),
                   legendTitle="")
print(g.sig1)

df.PSE = data.frame(long=lonlat[ ,1],
                    lat=lonlat[ ,2],
                    value=sigma2[[2]]^0.5)

g.sig2 = map.surge(data.PSE=df.PSE, 
                   zlim=c(0.02, 0.08), breaks=c(0.02, 0.05, 0.08),
                   legendTitle="")
print(g.sig2)